Data<-read.csv("C:/Users/neton/Documents/Machien_Learning_Mimi/AllData_global_treatmentB_onlyt1,t2,tavg.csv", 
               header=TRUE, stringsAsFactors=TRUE)
head(Data)

library("randomForest")
library("caret")
library('pROC')

#step 0 (for you to warm up)-create a RF model with randomly chosen values for mtry and ntree#
control<-trainControl(method="repeatedcv", number=10, repeats=3)
tunegrid<-expand.grid(.mtry=4)
set.seed(123)
RF0<-train(x=Data[ ,-1], y=Data[ ,1], method="rf", ntree=300, maxnodes=8, trControl=control, tuneGrid=tunegrid)
print(RF0)

#step 1-tune the parameters mtry and ntree#
control<-trainControl(method="repeatedcv", number=10, repeats=3, search="grid")
tunegrid<-expand.grid(.mtry=c(1:5))
RFlist<-list()
for (ntree in c(100, 150, 200, 250, 300, 350, 400)) {
  set.seed(123)
  RFi<-train(x=Data[ ,-1], y=Data[ ,1], method="rf", ntree=ntree, maxnodes=8, trControl=control, tuneGrid=tunegrid)
  key<-toString(ntree)
  RFlist[[key]]<-RFi
}
results<-resamples(RFlist)
summary(results)
dotplot(results)

#step 2-get the optimal value of mtry when ntree is 100#
control<-trainControl(method="repeatedcv", number=10, repeats=3, search="grid")
tunegrid<-expand.grid(.mtry=c(1:5))
set.seed(123)
RFj<-train(x=Data[ ,-1], y=Data[ ,1], method="rf", ntree=250, maxnodes=8, trControl=control, tuneGrid=tunegrid)
print(RFj)
plot(RFj)

#step 3-compare#
control<-trainControl(method="repeatedcv", number=10, repeats=3)
tunegrid<-expand.grid(.mtry=5)
set.seed(123)
RF1<-train(x=Data[ ,-1], y=Data[ ,1], method="rf", ntree=250, maxnodes=8, trControl=control, tuneGrid=tunegrid)
print(RF1)

#step 4-fit the data with the optimal combination of mtry and ntree#
set.seed(123)
RFfinal<-randomForest(x=Data[ ,-1], y=Data[ ,1], ntree=250, mtry=5, maxnodes=8, importance=TRUE, proximity=TRUE)
print(RFfinal)
plot(RFfinal)


sink(file="VariableImportanteGlobal.txt")

#variable importance -- top 4#
varImpPlot(RFfinal, sort=T, n.var=4, main="Top 4 important variables")
var.imp<-varImpPlot(RFfinal)
var.imp<-as.data.frame(var.imp)
var.imp

sink(file = NULL)

sink(file="ROC+AUC.txt")

#ROC curves + AUC
pred_test <- predict(RFfinal, y=data[ ,1], index=2, type="prob", norm.votes=TRUE, predict.all=False, proximity=FALSE, nodes=FALSE)
head(pred_test)

pred_test <- data.frame(pred_test)
pred_test_roc <- roc(Data$Row_Labels, pred_test$M1)

textdata<-as.data.frame(pred_test_roc[["sensitivities"]])
textdataB<-as.data.frame(pred_test_roc[["specificities"]])
textdata
textdataB

auc(pred_test_roc)

plot(pred_test_roc)

sink(file=NULL)
